NOTE: The images might not get rendered in html format. For that please check the notebook.
import numpy as np # mathematical manipulations
import pandas as pd # data manipulations
# for visualisation
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import cv2
# splitting into train and test sets
from sklearn.model_selection import train_test_split
# Neural Network libraries
import tensorflow as tf
# ignore warnings
import warnings
warnings.filterwarnings("ignore")
# initialize random number generator
import random
# set the seed for consistent results for multiple runs
seed = 7
np.random.seed(seed)
# copy data to current working directory
!cp '/content/drive/MyDrive/Colab Notebooks/images.npy' .
# verify data copy
!ls -l
# load dataset as NumPy array
dataset = np.load(file='/content/images.npy',allow_pickle=True)
# verify type of dataset
type(dataset)
# verify shape of dataset
dataset.shape
# observe the first record
dataset[0]
# seeing image part
dataset[0,0]
# observing mask, label and bounding box
dataset[0,1]
# separating data into images and masks
images = dataset[:,0]
masks = dataset[:,1]
# verify images shape
images.shape
# verify masks shape
masks.shape
plt.imshow(images[408])
display(masks[408])
# display random five images with bounding boxes - these would actually correspond to the mask locations
for i in range(0,5):
#Pickup a random image number
img_num = np.random.randint(0, images.shape[0])
#Read the image
img = images[img_num]
#Draw rectangle(s) as per bounding box information
for dictionary in masks[img_num]:
# get image height and width for scaling
image_height = dictionary['imageHeight']
image_width = dictionary['imageWidth']
# get min box coordinates
xmin = np.float32(dictionary['points'][0]['x'] * image_width)
ymin = np.float32(dictionary['points'][0]['y'] * image_height)
# get max box coordinates
xmax = np.float32(dictionary['points'][1]['x'] * image_width)
ymax = np.float32(dictionary['points'][1]['y'] * image_height)
#Get Label
label = dictionary['label'][0]
#Add bounding box
cv2.rectangle(img, (xmin,ymin), (xmax, ymax), (0,255,0), 2)
#Draw image using matplotlib
plt.figure(figsize=(10,7))
plt.imshow(img)
plt.show()
# set constants
IMG_SIZE = 224
IMG_HEIGHT = 224
IMG_WIDTH = 224
IMG_DEPTH = 3
# split data into train, validation and test
X_train, X_test, y_train, y_test = train_test_split(images, masks, test_size=0.2)
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size=0.2)
print(f"Shape of X_train is '{X_train.shape}' and the shape of y_train is '{y_train.shape}'")
print(f"Shape of X_val is '{X_val.shape}' and the shape of y_val is '{y_val.shape}'")
print(f"Shape of X_test is '{X_test.shape}' and the shape of y_test is '{y_test.shape}'")
def get_images_masks(images,masks):
# initialise arrays to hold data
#Input image is size IMG_HEIGHT,IMG_WIDTH,IMG_DEPTH
img_batch = np.zeros((images.shape[0], IMG_HEIGHT, IMG_WIDTH,IMG_DEPTH))
#Mask's size is IMG_HEIGHT, IMG_WIDTH
mask_batch = np.zeros((masks.shape[0], IMG_HEIGHT, IMG_WIDTH))
#Populate X and y with actual data
for i in range(images.shape[0]):
#Resize image
img = images[i]
img = cv2.resize(img, dsize=(IMG_HEIGHT, IMG_WIDTH), interpolation=cv2.INTER_CUBIC)
# assign all pixels in the first 3 channels only to the image, i.e., discard the alpha channel
try:
img = img[:,:,:3]
except:
# convert the grayscale image to color so that the number of channels are standardized to 3
img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
continue
# pre-process input as per the pre-trained model used and add to the batch
img_batch[i] = tf.keras.applications.mobilenet_v2.preprocess_input(np.array(img, dtype=np.float32))
# Read Masks
for dictionary in masks[i]:
# get min mask co-ordinates
xmin = int(dictionary['points'][0]['x'] * IMG_WIDTH)
ymin = int(dictionary['points'][0]['y'] * IMG_HEIGHT)
# get max mask co-ordinates
xmax = int(dictionary['points'][1]['x'] * IMG_WIDTH)
ymax = int(dictionary['points'][1]['y'] * IMG_HEIGHT)
# set all pixels within the mask co-ordinates to 1.
mask_batch[i][ymin:ymax,xmin:xmax] = 1
return img_batch, mask_batch
# Change to tensors
X_train , y_train = get_images_masks(X_train,y_train)
X_val, y_val = get_images_masks(X_val, y_val)
X_test, y_test = get_images_masks(X_test, y_test)
print(f"Shape of X_train is '{X_train.shape}' and the shape of y_train is '{y_train.shape}'")
print(f"Shape of X_val is '{X_val.shape}' and the shape of y_val is '{y_val.shape}'")
print(f"Shape of X_test is '{X_test.shape}' and the shape of y_test is '{y_test.shape}'")
# visualise masks and images for generator for verification
idx = np.random.randint(0,X_train.shape[0],2)
# initialising subplots
figure, ax = plt.subplots(nrows=2, ncols=2)
# setting figure parameters
figure.set_figheight(15)
figure.set_figwidth(15)
# setting images and masks to axis
ax[0][0].imshow(X_train[idx][0])
ax[1][0].imshow(y_train[idx][0])
ax[0][1].imshow(X_train[idx][1])
ax[1][1].imshow(y_train[idx][1])
plt.show()
We will be using Transfer Learning technique to get a pre-trained model(MobileNet in our case) and form a U-Net architecture for segmentation. Clearly we are going for semantic segmentation because all we are detecting is a face. The same problem can be extended for instance segmentation and a Mask R-CNN can be used.
The pre-trained MobileNet will be used as the encoder part of the U-Net Architecture. The decoder part will be built using upsampling layers.
#Define input layer
input_tensor = tf.keras.layers.Input((IMG_HEIGHT, IMG_WIDTH, IMG_DEPTH), name='input_img')
# download pre-trained model
pre_trained_model = tf.keras.applications.mobilenet_v2.MobileNetV2(input_tensor= input_tensor,
alpha=0.5,
weights='imagenet',
include_top=False)
print(f'Number of layers in MobileNet are {len(pre_trained_model.layers)}')
# describing the pre trained model
pre_trained_model.summary()
def conv2d_block(input_tensor, n_filters):
"""Function to add 2 convolutional layers with the parameters passed to it"""
# first layer
x = tf.keras.layers.Conv2D(n_filters, (3,3), kernel_initializer = 'he_normal', padding = 'same')(input_tensor)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation('relu')(x)
# second layer
x = tf.keras.layers.Conv2D(n_filters, (3,3), kernel_initializer = 'he_normal', padding = 'same')(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation('relu')(x)
return x
def build_decoder_block(input, layer_name, number_of_filters):
layer_output = pre_trained_model.get_layer(layer_name).output
x = tf.keras.layers.UpSampling2D()(input)
#Skip connection
x = tf.keras.layers.Concatenate()([x, layer_output])
x = conv2d_block(x,number_of_filters)
return x
def build_decoder():
skip_connection_names = ["input_img", "block_1_expand_relu", "block_3_expand_relu", "block_6_expand_relu"]
encoder_output = pre_trained_model.get_layer('block_13_expand_relu').output
filters = [16, 32, 48, 64]
x = encoder_output
for i in range(1, len(skip_connection_names)+1):
# Decoder blocks
layer_name = skip_connection_names[-i]
number_of_filters = filters[-i]
x = build_decoder_block(x,layer_name,number_of_filters)
return x
decoder = build_decoder()
def build_model(input,decoder):
#Build the Output layer
outputs = tf.keras.layers.Conv2D(1, (1, 1), activation='sigmoid')(decoder)
#Build the model using different layers
model = tf.keras.Model(inputs=[input], outputs=[outputs])
return model
final_model = build_model(pre_trained_model.input,decoder)
final_model.summary()
We will use Dice Coefficient for evaluation
# define DICE-COEFFICIENT
smooth = tf.keras.backend.epsilon() # prevent zero error
def dice_coef(y_true, y_pred, smooth=1):
"""
Dice = (2*|X & Y|)/ (|X|+ |Y|)
= 2*sum(|A*B|)/(sum(A^2)+sum(B^2))
ref: https://arxiv.org/pdf/1606.04797v1.pdf
"""
intersection = tf.keras.backend.sum(tf.keras.backend.abs(y_true * y_pred), axis=-1)
return (2. * intersection + smooth) / (tf.keras.backend.sum(tf.keras.backend.square(y_true),-1) + tf.keras.backend.sum(tf.keras.backend.square(y_pred),-1) + smooth)
# LOSS CORRESPONDING TO DICE COEFF
def dice_coef_loss(y_true, y_pred):
return 1-dice_coef(y_true, y_pred)
# compile the model
opt = tf.keras.optimizers.Adam(lr=1e-4, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
metrics = [dice_coef, tf.keras.metrics.Recall(), tf.keras.metrics.Precision()]
final_model.compile(loss=dice_coef_loss, optimizer=opt, metrics=metrics)
BATCH_SIZE = 8
EPOCHS = 50
# introduce model callbacks
callbacks = [
tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=4,min_lr=1e-6, verbose=1, mode="min"),
tf.keras.callbacks.ModelCheckpoint("face_mask_model.h5", monitor="val_loss", verbose=1, save_best_only=True, mode="min")
]
train_step_size = np.ceil(X_train.shape[0]/BATCH_SIZE)
val_step_size = np.ceil(X_val.shape[0]/BATCH_SIZE)
test_step_size = np.ceil(X_test.shape[0]/BATCH_SIZE)
train_step_size, val_step_size, test_step_size
# train the model
training_history = final_model.fit(X_train, y_train,
steps_per_epoch=train_step_size,
validation_data=(X_val,y_val),
validation_steps=val_step_size,
callbacks=callbacks,
epochs=EPOCHS)
def plot_train_history_with_epochs(training_history):
'''
This function is used to plot Training and Validation loss
versus the number of epochs based on the training history of the model.
'''
# extracting training and validation losses from model history
train_loss = training_history.history['loss']
val_loss = training_history.history['val_loss']
# number of epochs
epochs = range(len(train_loss))
# plot train and validation loss w.r.t number of epochs
plt.figure(figsize=(10,8))
plt.plot(epochs, train_loss, label = 'training loss')
plt.plot(epochs, val_loss, label = 'validation loss')
plt.legend(loc='upper right')
plt.xlabel('Number of Epochs')
plt.ylabel('Training and Validation loss')
plt.title('Training and Validation loss v/s Epochs')
plt.show()
# load best saved model
loaded_model = tf.keras.models.load_model('face_mask_model.h5',
custom_objects={'dice_coef_loss':dice_coef_loss,'dice_coef':dice_coef})
# evaluating the loaded model on train data
train_metrics = loaded_model.evaluate(X_train,y_train,verbose=0,steps=train_step_size)
# evaluating the loaded model on validation data
val_metrics = loaded_model.evaluate(X_val,y_val,verbose=0,steps=val_step_size)
# evaluating the loaded model on test data
test_metrics = loaded_model.evaluate(X_test,y_test,verbose=0,steps=test_step_size)
# displaying the evaluated metrics
metrics_df = pd.DataFrame(index=['Loss','Dice Coefficient', 'Recall','Precision'],columns=['Training','Validation','Testing'],
data=[[train_metrics[0],val_metrics[0],test_metrics[0]],
[train_metrics[1],val_metrics[1],test_metrics[1]],
[train_metrics[2],val_metrics[2],test_metrics[2]],
[train_metrics[3],val_metrics[3],test_metrics[3]]])
metrics_df
# call to appropriate function for train history
plot_train_history_with_epochs(training_history)
# copy prediction image
!cp '/content/drive/MyDrive/Colab Notebooks/Part 1Test Data - Prediction Image.jpeg' .
# verify copying
!ls -l
# load the prediction image
file_path = '/content/Part 1Test Data - Prediction Image.jpeg'
img = cv2.imread(file_path)
#Resize image
img = cv2.resize(img, dsize= (IMG_WIDTH,IMG_HEIGHT), interpolation=cv2.INTER_CUBIC)
# convert to RGB
img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
# pre-process as per the model
processed_img = tf.keras.applications.mobilenet_v2.preprocess_input(np.array(img, dtype=np.float32))
# expanding dimensions into tensor
processed_img = np.expand_dims(processed_img,axis=0)
processed_img.shape
# predicting using the best saved model
y_pred = loaded_model.predict(processed_img)
y_pred.shape
# assigning label beyond a threshold and resize
pred_mask = cv2.resize((1.0*(y_pred[0] > 0.7)), (IMG_WIDTH,IMG_HEIGHT))
# visualise masks and images for prediction image
# initialising subplots
figure, ax = plt.subplots(nrows=1, ncols=2)
# setting figure parameters
figure.set_figheight(15)
figure.set_figwidth(15)
# setting images and masks to axis
ax[0].imshow(processed_img[0])
ax[0].axis('off')
ax[1].imshow(pred_mask)
ax[1].axis('off')
plt.show()
Pinterest and aligned using dlib library.Most all the libraries are imported from above. We will be re-using them.
# operating system library
import os
# for visualising images in a grid
from mpl_toolkits.axes_grid1 import ImageGrid
# for pre-processing - encode labels, scaling
from sklearn.preprocessing import LabelEncoder,StandardScaler
# searching for hyperparameter tuning
from sklearn.model_selection import GridSearchCV
# for dimensionality reduction using Principal Component Analysis
from sklearn.decomposition import PCA
# ML model libraries
from sklearn import svm, metrics
# copy the content to current directory
!cp '/content/drive/MyDrive/Colab Notebooks/Aligned+Face+Dataset+from+Pinterest+-+CV+project+1.zip' .
# verify copying
!ls -l
# unzip the data either using command or zipfile module
!unzip /content/Aligned+Face+Dataset+from+Pinterest+-+CV+project+1.zip
# verify after unzipping the content
!ls -l
# Observing data structure
!ls -l pins/
!ls -l pins/PINS
!ls -l pins/PINS | wc -l
!ls -l PINS | wc -l
!ls -l PINS/
!ls -l 'PINS/pins_Jim Parsons'
BASE_PATH = '/content/PINS'
# initialise the dataframe to hold the metadata
# number of images is redundant and can be omitted
metadata_df = pd.DataFrame(columns=['PERSON','PERSON_BASE_PATH','FILES', 'NUMBER_OF_IMAGES'])
for dirs in os.listdir(BASE_PATH): # list all directories or persons
if 'PINS_' in dirs.upper(): # check case-insensitive for pinterest
person = dirs[5:].replace(' ','_').upper() # replace space with _
person_base_path = os.path.join(BASE_PATH,dirs) # get base directory path of each person
files = [] # initialise a list to hold all corresponding files
for img_path in os.listdir(os.path.join(BASE_PATH,dirs)):
files.append(img_path) # get each image path
number_of_images = len(files) # calculate number of images
# append to data-frame
metadata_df.loc[metadata_df.shape[0]] = [person , person_base_path , files , number_of_images]
# verify top records of data-frame
metadata_df.head()
# verify shape of data-frame
metadata_df.shape
# checking if all the persons are unique
metadata_df['PERSON'].nunique()
# verifying total number of images
total_images = metadata_df['NUMBER_OF_IMAGES'].sum()
total_images
We will try to visualise randomly 10 images for 10 persons.
# get 10 random index for persons
person_idices = np.random.choice(metadata_df.shape[0],10)
# initialise figure parameters
fig = plt.figure(1, (100, 100))
grid = ImageGrid(fig, 111, nrows_ncols=(10, 10), axes_pad=0.1)
counter = 0
for person_idx in person_idices:
# get a record from the dataframe
record = metadata_df.iloc[person_idx]
person = record['PERSON']
person_base_path = record['PERSON_BASE_PATH']
number_of_images = record['NUMBER_OF_IMAGES'] - 1
files = record['FILES']
# get randomly 10 indices of images for a person
image_indices = np.random.choice(number_of_images, 10)
display_title = True # display title or image
# display images for a person
for img_idx in image_indices:
ax = grid[counter]
if display_title:
ax.text(1700, 105, person, verticalalignment='center',fontsize=60)
display_title = False
img_path = person_base_path + '/' + files[img_idx]
img = cv2.imread(img_path)
img = cv2.resize(img, (150, 150))
ax.axis("off")
ax.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
counter += 1
plt.show()
# define model params
IMG_SIZE = 224
# load a model for assigning the pre-trained weights
def vgg_face(img_size=224):
model = tf.keras.models.Sequential() # initialise the model
model.add(tf.keras.layers.ZeroPadding2D((1,1),input_shape=(img_size,img_size, 3)))
model.add(tf.keras.layers.Convolution2D(64, (3, 3), activation='relu'))
model.add(tf.keras.layers.ZeroPadding2D((1,1)))
model.add(tf.keras.layers.Convolution2D(64, (3, 3), activation='relu'))
model.add(tf.keras.layers.MaxPooling2D((2,2), strides=(2,2)))
model.add(tf.keras.layers.ZeroPadding2D((1,1)))
model.add(tf.keras.layers.Convolution2D(128, (3, 3), activation='relu'))
model.add(tf.keras.layers.ZeroPadding2D((1,1)))
model.add(tf.keras.layers.Convolution2D(128, (3, 3), activation='relu'))
model.add(tf.keras.layers.MaxPooling2D((2,2), strides=(2,2)))
model.add(tf.keras.layers.ZeroPadding2D((1,1)))
model.add(tf.keras.layers.Convolution2D(256, (3, 3), activation='relu'))
model.add(tf.keras.layers.ZeroPadding2D((1,1)))
model.add(tf.keras.layers.Convolution2D(256, (3, 3), activation='relu'))
model.add(tf.keras.layers.ZeroPadding2D((1,1)))
model.add(tf.keras.layers.Convolution2D(256, (3, 3), activation='relu'))
model.add(tf.keras.layers.MaxPooling2D((2,2), strides=(2,2)))
model.add(tf.keras.layers.ZeroPadding2D((1,1)))
model.add(tf.keras.layers.Convolution2D(512, (3, 3), activation='relu'))
model.add(tf.keras.layers.ZeroPadding2D((1,1)))
model.add(tf.keras.layers.Convolution2D(512, (3, 3), activation='relu'))
model.add(tf.keras.layers.ZeroPadding2D((1,1)))
model.add(tf.keras.layers.Convolution2D(512, (3, 3), activation='relu'))
model.add(tf.keras.layers.MaxPooling2D((2,2), strides=(2,2)))
model.add(tf.keras.layers.ZeroPadding2D((1,1)))
model.add(tf.keras.layers.Convolution2D(512, (3, 3), activation='relu'))
model.add(tf.keras.layers.ZeroPadding2D((1,1)))
model.add(tf.keras.layers.Convolution2D(512, (3, 3), activation='relu'))
model.add(tf.keras.layers.ZeroPadding2D((1,1)))
model.add(tf.keras.layers.Convolution2D(512, (3, 3), activation='relu'))
model.add(tf.keras.layers.MaxPooling2D((2,2), strides=(2,2)))
model.add(tf.keras.layers.Convolution2D(4096, (7, 7), activation='relu'))
model.add(tf.keras.layers.Dropout(0.5))
model.add(tf.keras.layers.Convolution2D(4096, (1, 1), activation='relu'))
model.add(tf.keras.layers.Dropout(0.5))
model.add(tf.keras.layers.Convolution2D(2622, (1, 1)))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Activation('softmax'))
return model
# copy weights to current directory
!cp '/content/drive/MyDrive/Colab Notebooks/vgg_face_weights.h5' .
# verify the contents
!ls -l
# build a placeholder model for loading pre-trained weights
model = vgg_face(IMG_SIZE)
# load weights to the placeholder model
model.load_weights('/content/vgg_face_weights.h5')
# getting input layer
model.layers[0].input
# getting output layer
model.layers[-1].output
# getting embedding layer
model.layers[-2].output
# generating a VGG Face descriptor from the model
vgg_face_descriptor = tf.keras.models.Model(inputs=model.layers[0].input, outputs=model.layers[-2].output)
# getting the dimensions of each emebedding vector
embedding_dimensions = vgg_face_descriptor.output.shape[1]
embedding_dimensions
# generate embedding vectors for each image
embedding_vectors = np.zeros((total_images,embedding_dimensions)) # initialise array to hold embedding vectors
embedding_counter = 0
classes = []
for idx in metadata_df.index: # for each record in the data-frame
each_person = metadata_df.iloc[idx]
each_person_name = each_person['PERSON']
each_person_base_path = each_person['PERSON_BASE_PATH']
each_person_images = each_person['FILES']
for each_img_idx in range(0,each_person['NUMBER_OF_IMAGES']): # for each image of a person
# Read - resize - normalize the image
img_path = each_person_base_path + '/' + each_person_images[each_img_idx]
img = tf.keras.preprocessing.image.load_img(img_path)
img_arr = tf.keras.preprocessing.image.img_to_array(img.resize([IMG_SIZE,IMG_SIZE]))
# Normalising pixel values from [0-255] to [0-1]: scale RGB values to interval [0,1]
img_arr = (img_arr / 255.).astype(np.float32)
# get the embedding vector for each image tensor
embedding_vectors[embedding_counter] = vgg_face_descriptor.predict(np.expand_dims(img_arr,axis=0))[0]
embedding_counter +=1
# get labels
classes.append(each_person_name)
# verifying the shape of the embedding-vectors
embedding_vectors.shape
# verifying few records of the embedding-vectors
embedding_vectors[:10]
np.array(classes).shape
For calculation of similarity or dissimilarity between emebedding vectors, we can use various metrics like:
Here, we will make use of L2 distance.
def l2_distance_metric(emb1, emb2):
'''
Calculate Sum of squares of differences
'''
return np.sum(np.square(emb1 - emb2))
def get_normalized_img_arr(img_path):
'''
Gets a normalized image array from the image path
'''
img = tf.keras.preprocessing.image.load_img(img_path)
img_arr = tf.keras.preprocessing.image.img_to_array(img.resize([IMG_SIZE,IMG_SIZE]))
# Normalising pixel values from [0-255] to [0-1]: scale RGB values to interval [0,1]
img_arr = (img_arr / 255.).astype(np.float32)
return img_arr
def get_img_arr_name(idx):
'''
Gives a pair of image array and name
'''
person_record = metadata_df.iloc[idx]
person_name = person_record['PERSON']
person_base_path = person_record['PERSON_BASE_PATH']
person_files = person_record['FILES']
person_number_of_images = person_record['NUMBER_OF_IMAGES']
random_images = np.random.randint(0,person_number_of_images,2)
# Read - resize - normalize the image
img_path = person_base_path + '/' + person_files[random_images[0]]
img_arr1 = get_normalized_img_arr(img_path)
img_path = person_base_path + '/' + person_files[random_images[1]]
img_arr2 = get_normalized_img_arr(img_path)
return person_name,img_arr1, img_arr2
def get_positive_negative_pairs():
'''
Gives a positive pair of embeddings and negative pair of embeddings
'''
random_pair_indices = np.random.randint(0,metadata_df.shape[0],2)
# first person details
person1, img_arr11, img_arr12 = get_img_arr_name(random_pair_indices[0])
# second person details
person2, img_arr21, img_arr22 = get_img_arr_name(random_pair_indices[1])
# get embeddings
emb11 = vgg_face_descriptor.predict(np.expand_dims(img_arr11,axis=0))[0]
emb12 = vgg_face_descriptor.predict(np.expand_dims(img_arr12,axis=0))[0]
emb21 = vgg_face_descriptor.predict(np.expand_dims(img_arr21,axis=0))[0]
emb22 = vgg_face_descriptor.predict(np.expand_dims(img_arr22,axis=0))[0]
positive_pair = ((person1,person1),(emb11,emb12),(img_arr11, img_arr12))
negative_pair = ((person1,person2),(emb11,emb21),(img_arr11, img_arr21))
return positive_pair,negative_pair
# display embeddings and corresponding images
def show_pair(pair):
# initialising subplots
figure, ax = plt.subplots(nrows=1, ncols=2,constrained_layout=True)
# setting figure parameters
figure.set_figheight(15)
figure.set_figwidth(15)
figure.suptitle(f'Distance = {l2_distance_metric(pair[1][0], pair[1][1]):.4f}', fontsize=16)
# setting images
ax[0].imshow(pair[2][0])
ax[0].set_title(pair[0][0])
ax[0].axis('off')
ax[1].imshow(pair[2][1])
ax[1].set_title(pair[0][1])
ax[1].axis('off')
plt.tight_layout()
plt.show()
positive_pair,negative_pair = get_positive_negative_pairs()
positive_pair
show_pair(positive_pair)
negative_pair
show_pair(negative_pair)
# every 9th example goes in test data and rest go in train data
train_idx = np.arange(embedding_vectors.shape[0]) % 9 != 0
test_idx = np.arange(embedding_vectors.shape[0]) % 9 == 0
# one half as train examples of 10 identities
X_train = embedding_vectors[train_idx]
# another half as test examples of 10 identities
X_test = embedding_vectors[test_idx]
classes = np.array(classes)
#train labels
y_train = classes[train_idx]
#test labels
y_test = classes[test_idx]
print(f'X_train shape : ({X_train.shape[0]},{X_train.shape[1]})')
print(f'y_train shape : ({y_train.shape[0]},)')
print(f'X_test shape : ({X_test.shape[0]},{ X_test.shape[1]})')
print(f'y_test shape : ({y_test.shape[0]},)')
le = LabelEncoder() # initialise the encoder
# encode the labels
y_train_encoded = le.fit_transform(y_train)
y_test_encoded = le.transform(y_test)
y_train_encoded
y_test_encoded
For PCA it is important to have normalized/scaled feature values so that the components are not biased towards specific feature values
scaler = StandardScaler() # initialise the scaler
# scale the feature values
X_train_std = scaler.fit_transform(X_train)
X_test_std = scaler.transform(X_test)
As each of our embeddings have 2622 features, we can reduce the dimensions.
PCA is a way of linearly transforming the data such that most of the information in the data is contained within a smaller number of features called components.
def evaluate_the_model(model,model_name,X_train,y_train,X_test,y_test):
# predict on test data
y_pred = model.predict(X_test)
predictions[model_name] = y_pred
train_accuracy = model.score(X_train,y_train)
test_accuracy = model.score(X_test,y_test)
accuracies[model_name] = [train_accuracy,test_accuracy]
print(f'The accuracy score of model on train data is {round(train_accuracy,4)*100}%')
print(f'The accuracy score of model on test data is {round(test_accuracy,4)*100}%')
print()
acc= metrics.accuracy_score(y_test, y_pred)
print('Accuracy Score :','%0.2f' % acc)
model_lists.append([model_name, acc * 100])
# initialised a dictionary to hold accuracy of various models
# {model: [train_accuracy,test_accuracy]}
accuracies = {}
# initialised a dictionary to hold predictions of various models
# {model: [predictions]}
predictions = {}
model_lists = [] # initialise to hold various model parameters
pca = PCA(n_components=500) # initialisize PCA with 500 components
# transform the scaled feature values
X_train_pca = pca.fit_transform(X_train_std)
X_test_pca = pca.transform(X_test_std)
# hyper-parameters to search from
param_grid = {'C': [0.1,1,5], 'gamma': [0.01,0.001],'kernel': ['rbf', 'linear']}
# grid search classifier
svm_grid= GridSearchCV(svm.SVC(), param_grid, verbose = 2,cv=5, n_jobs = -1)
# # fit on samples
svm_grid.fit(X_train_pca, y_train_encoded)
# getting the best parameters
svm_grid.best_params_
# call to the function
evaluate_the_model(svm_grid,'SVM with Grid Search CV',X_train_pca,y_train_encoded,X_test_pca,y_test_encoded)
model_df = pd.DataFrame(model_lists, columns = ['Model', 'Accuracy Scores on Test'])
model_df
accuracy_df = pd.DataFrame(accuracies,index=['Train Accuracy','Test Accuracy'])
accuracy_df
!cp '/content/drive/MyDrive/Colab Notebooks/Part 2 - Test Image - Dwayne Johnson4.jpg' .
!cp '/content/drive/MyDrive/Colab Notebooks/Part 2- Test Image - Benedict Cumberbatch9.jpg' .
!ls -l
def prediction_pipeline(img_path, actual_label):
img = tf.keras.preprocessing.image.load_img(img_path)
img_arr = tf.keras.preprocessing.image.img_to_array(img.resize([IMG_SIZE,IMG_SIZE]))
# Normalising pixel values from [0-255] to [0-1]: scale RGB values to interval [0,1]
img_arr = (img_arr / 255.).astype(np.float32)
# get the embedding vector for each image tensor
embedding_vector = vgg_face_descriptor.predict(np.expand_dims(img_arr,axis=0))[0]
# scale the feature values
embedding_vector_scaled = scaler.transform(np.expand_dims(embedding_vector,axis=0))
# transform the scaled feature values
embedding_vector_pca = pca.transform(embedding_vector_scaled)
# make a prediction
y_pred = svm_grid.predict(embedding_vector_pca)
print(f'Actual Label: {actual_label.upper()}')
print(f'Predicted Encoded Label: {y_pred[0]}')
print(f'Predicted Label: {le.inverse_transform(y_pred)[0]}')
prediction_pipeline('/content/Part 2- Test Image - Benedict Cumberbatch9.jpg','Benedict Cumberbatch')
tf.keras.preprocessing.image.load_img('/content/Part 2- Test Image - Benedict Cumberbatch9.jpg')
prediction_pipeline('/content/Part 2 - Test Image - Dwayne Johnson4.jpg','Dwayne Johnson')
tf.keras.preprocessing.image.load_img('/content/Part 2 - Test Image - Dwayne Johnson4.jpg')
%%shell
jupyter nbconvert --to html '/content/drive/MyDrive/Colab Notebooks/ADVANCED_COMPUTER_VISION_PROJECT.ipynb'